************************************************************
*****     			CLEAN REGISTRY DATA			       *****
************************************************************

/* CONTENTS:
1. Code to clean and manage birth registry data  
2. Code to manage labor data-- put in wide format
	2.1. Clean and rename variables for fathers
	2.2. Clean and rename variables for mothers
3. Match fathers with children
4. Child hospitalizations 
5. Create dataset of unique hospitalizations
6. Mortality (fatal shocks)
7. Create dataset of unique fatal shocks
8. Mental health
9. Parental hospitalization
10. Parental mortality
11. Parental transfers
12. Parental allowances
13. Parental location
14. Care burden 
15. Cancer and Skin Conditions 
16. Matched sample for Mutual Shocks (Hospitalizations)
17. Matched sample for Mutual Shocks (Mortality)
18. Aggregated child hospital stays
19. Chronic vs nonchronic conditions

*/

*********************************************  
*** 1. Birth registry: CLEAN AND RENAME VARIABLES 
*********************************************

{
    
use "D:\b44\custom-made\synre_shnro", clear

*Destring and rename variables

** Year
destring tilastovuosi, replace
rename tilastovuosi year

** Sex 
destring sukup, replace
rename sukup sex // values? 
g male = 1 if sex == 1
replace male = 0 if sex == 2

** Birth order twins 
generate birth_order_twin =.
replace birth_order_twin = 1 if jarjestyskirjain =="A"
replace birth_order_twin = 2 if jarjestyskirjain =="B"

** Mortality at birth 
destring kuolleisuus, replace
rename kuolleisuus mortality

** Number of births 
destring synnytys_paltu, replace
sort synnytys_paltu

***************************************************************

*Mother variables

** Create unique mother id 
egen mother_id = group(aiti_shnro)

** Create nationality: 1=Finnish, 2=Foreign
destring kansalaisuus, replace
gen nationality = kansalaisuus
recode nationality (1=1) (2=2) (9=.) (.=.)

**Create Finnish binary variable 
gen finnish=(nationality==1)
replace finnish=. if nationality==.

** Civil status and cohabitation status
destring siviilisaaty, replace
rename siviilisaaty marital_status
lab define civilstatus 1 "Non-married" 2 "Married" ///
3 "Legally separated" 4 "Divorced" 5 "Widowed" 6 "Registered partner" ///
7 "Separated after registered partnership" 9 "Unknown"
lab values marital_status civilstatus

** Married binary
gen married=(marital_status==2)
replace married=. if marital_status==.


** Single binary
destring avoliitto, replace
rename avoliitto cohabitation // "Relationship status", 1 for relationship, 2 for non relationship, 9 unknown

gen single=(cohabitation==2)
replace single=. if cohabitation==.

* Recode time variables / medical birth register 
** Mother birth
gen mother_b_year = year(AIDIN_SYNTYMAPVM)
gen mother_b_month = month(AIDIN_SYNTYMAPVM)
gen mother_b_day = day(AIDIN_SYNTYMAPVM)

** Mother age
rename AITI_IKA mother_age

** Child death
gen child_d_date = dofc(LAPSEN_KUOLINPVM) // ”Child’s date of death (if within 365 days after birth)”
gen child_d_year = year(child_d_date)
gen child_d_month = month(child_d_date)
gen child_d_day = day(child_d_date)

** Child birth
gen child_b_date = dofc(LAPSEN_SYNTYMAPVM)
format child_b_date %td
gen child_b_year = year(child_b_date)
gen child_b_month = month(child_b_date)
gen child_b_day = day(child_b_date)
gen child_b_week =week(child_b_date)
gen child_b_quarter=quarter(child_b_date)

** Date baby discharged from hospital
gen child_home_from_hospital = LAPSEN_LAHTOPVM
format child_home %td

** Date of admission and discharge of the mother
gen mother_admission_date=dofc(AITI_TULOPVM)
gen mother_discharge_date=dofc(AITI_LAHTOPVM)
format mother_admission_date %td
format mother_discharge_date %td

** Generate length of stay of mother: from child_b_date to discharge
g length_stay=mother_discharge_date-child_b_date
* there are some errors. Calculate as well length from mother admission to discharge
gen length_stay2=mother_discharge - mother_admission
replace length_stay=length_stay2 if length_stay>(length_stay2) &length_stay!=.
drop length_stay2



** Create identifier for first birth of the mother in the data (regardless of previous_births)
bys mother_id (child_b_date): gen nbirths_cohort=_n
gen first_birth_cohort=1 if nbirths_cohort==1
replace first_birth_cohort=0 if first_birth_cohort==.

** Previous c-section
gen asektio_n =.
replace asektio_n = 0 if asektio=="E"
replace asektio_n = 1 if asektio=="K"
rename asektio_n previous_csection

** Previous births
rename aiemmatsynnytykset previous_births
rename  aiemmatraskaudet previous_pregnancies
gen first_birth=(previous_births==0 & first_birth_cohort==1)
replace first_birth=. if previous_births==.
replace first_birth=0 if previous_csection==1 & first_birth==1


** SES categories for mothers
destring soseko,replace
rename soseko ses

replace ses = . if inlist(ses, 99)

g unemployed = 1 if inlist(ses, 81)
replace unemployed = 0 if (unemployed == . & ses !=.)

g selfemployed = 1 if inlist(ses, 11, 12, 21, 22, 23, 24, 29)
replace selfemployed = 0 if (selfemployed == . & ses !=.)

g highskilled_whitecollar = 1 if inlist(ses, 31, 32, 33, 34, 39)
replace highskilled_whitecollar= 0 if (highskilled_whitecollar == . & ses !=.)

g lowskilled_whitecollar = 1 if inlist(ses, 41, 42, 43, 44, 49)
replace lowskilled_whitecollar= 0 if (lowskilled_whitecollar == . & ses !=.)

g manualworkers = 1 if inlist(ses, 51, 52, 53, 54, 59)
replace manualworkers= 0 if (manualworkers== . & ses !=.)

g students= 1 if inlist(ses, 60)
replace students= 0 if (students== . & ses !=.)

g pensioners = 1 if inlist(ses, 71, 72, 73, 74, 79)
replace pensioners= 0 if (pensioners== . & ses !=.)

g other_employ=1 if inlist(ses,82)
replace other_employ=0 if other_employ==. & ses!=.

	
* Create mortality

gen death_after_birth = child_d_date - child_b_date

* Week mortality

gen week_mortality = death_after_birth

recode week_mortality (.=0) (0/7=1) (8/max=.)

* Month mortality

gen month_mortality = death_after_birth

recode month_mortality (.=0) (0/31=1) (32/max=.)

* Year mortality 

gen year_mortality = death_after_birth

recode year_mortality (.=0) (0/365=1) (366/max=.)

gen gestation_weeks = KESTOARVIO_PV / 7

rename syntymapaino birth_weight

* variations
gen log_birthweight = log(birth_weight)
g bigbaby = (birth_weight >=  4000) if birth_weight!=.
g lbw = (birth_weight < 2500) if birth_weight!=.

* Baby length stay *
rename LAPSEN_LAHTOPVM dischargedate_bb
format mother_discharge_date %td
count if dischargedate_bb == . &  ( mother_discharge_date == dischargedate_bb)
g lengthstay_baby = dischargedate_bb - child_b_date


drop if child_b_year<1987

save "$processed_data\clean_births.dta", replace

}


*********************************************  
*** 2.CLEAN LABOR DATA
*********************************************
{

* 2.1. FATHERS *

* 1987 - 2000
use "D:\ready-made\FOLK_perus_8800a\folk_19872000_tua_perus20tot_2.dta", clear
keep if sukup=="1" // males
rename tyotu_k income
replace amas1="." if amas1==""
destring amas1,replace
rename amas1 occupational_status
lab define occupation 1 "Employed" 2 "Self-employed"
lab values occupational_status occupation
rename tyela unemp
rename osela parttime
rename lkm_k nchild
rename kturaha_k totalincome
rename ika age
rename ptoim1 activity
destring activity, replace
replace activity=1 if activity==11
replace activity=0 if activity==12
replace activity=. if activity!=0 & activity!=1
g divorced = (sivs == "4")
g unmarried = (sivs == "1")
g married = (sivs == "2")
g widowed = (sivs=="5")
rename tyke unemp_mo
rename tkela disable
rename kunta municipality

keep age vuosi shnro income ututku_aste occupation unemp ///
parttime nchild totalincome activity unemp_mo divorced unmarried married widowed disable municipality

save "$processed_data/fathers_8800",replace

* 2001-2010
use "D:\ready-made\FOLK_perus_0110a\folk_20012010_tua_perus20tot_2.dta", clear 
keep if sukup=="1" // males
rename tyotu_k income
replace amas1="." if amas1==""
destring amas1,replace
rename amas1 occupational_status
lab define occupation 1 "Employed" 2 "Self-employed"
lab values occupational_status occupation
rename tyela unemp
rename osela parttime
rename lkm_k nchild
rename kturaha_k totalincome
rename ika age
rename ptoim1 activity
destring activity, replace
replace activity=1 if activity==11
replace activity=0 if activity==12
replace activity=. if activity!=0 & activity!=1
g divorced = (sivs == "4")
g unmarried = (sivs == "1")
g married = (sivs == "2")
g widowed = (sivs=="5")
rename tyke unemp_mo
rename tkela disable
rename kunta municipality

keep age vuosi shnro income ututku_aste occupation unemp ///
parttime nchild totalincome activity unemp_mo divorced unmarried married widowed disable municipality

save "$processed_data/fathers_0110",replace

* 2011 to 2019
use "D:\ready-made\FOLK_perus_11a\folk_20112019_tua_perus20tot_2.dta", clear 
keep if sukup=="1" // males
rename tyotu_k income
rename tyela unemp
rename osela parttime
rename lkm_k nchild
rename kturaha_k totalincome
rename ika age
rename ptoim1 activity
destring activity, replace
replace activity=1 if activity==11
replace activity=0 if activity==12
replace activity=. if activity!=0 & activity!=1
g divorced = (sivs == "4")
g unmarried = (sivs == "1")
g married = (sivs == "2")
g widowed = (sivs=="5")
rename tyke unemp_mo
rename tkela disable
rename kunta municipality

keep age vuosi shnro income ututku_aste unemp parttime nchild totalincome activity unemp_mo divorced unmarried married widowed disable municipality

save "$processed_data/fathers_1119",replace

* Append all years
use "$processed_data/fathers_8800", clear
append using "$processed_data/fathers_0110"
append using "$processed_data/fathers_1119"
rename shnro shnro_father

compress 
save "$processed_data\fathers1988_2019", replace

** Reshape 
reshape wide age income ututku_aste occupation unemp ///
parttime nchild totalincome activity unemp_mo divorced unmarried married widowed disable municipality, i(shnro_father) j(vuosi)

save "$processed_data/fathers1988_2019_wide.dta",replace

* Clean data on company ownership and changing jobs (public sector working)
use "D:\ready-made\FOLK_jaksot_a\tyosuhde_1.dta", clear
append using "D:\ready-made\FOLK_jaksot_a\tyosuhde_2.dta", force


g public = (oyr_omist_tyyppi == "2" | oyr_omist_tyyppi  == "3")
rename syrtun enterprise
rename sykstun establishment
keep vuosi shnro public enterprise establishment
duplicates drop vuosi shnro ,force
reshape wide  public enterprise establishment, i(shnro) j(vuosi)
save "$processed_data/ownership1988_2015_wide.dta",replace

** Merge ownership file
use "$processed_data/ownership1988_2015_wide.dta", clear
rename shnro shnro_father
merge 1:1 shnro_father using "$processed_data/fathers1988_2019_wide.dta"
rename _merge merge_ownership
compress
save "$processed_data/fathers1988_2019_wide.dta",replace

rm "$processed_data/fathers_8800.dta"
rm "$processed_data/fathers_0110.dta"
rm "$processed_data/fathers_1119.dta"

***************************************************************
* 2.2. MOTHERS *

* 87-2000
use "D:\ready-made\FOLK_perus_8800a\folk_19872000_tua_perus20tot_2.dta" ,clear
keep if sukup=="2" // females
rename tyotu_k income
replace amas1="." if amas1==""
destring amas1,replace
rename amas1 occupational_status
lab define occupation 1 "Employed" 2 "Self-employed"
lab values occupational_status occupation
rename tyela unemp
rename osela parttime
rename lkm_k nchild
rename kturaha_k totalincome
rename ika age
rename ptoim1 activity
destring activity, replace
replace activity=1 if activity==11
replace activity=0 if activity==12
replace activity=. if activity!=0 & activity!=1
g divorced = (sivs == "4")
g unmarried = (sivs == "1")
g married = (sivs == "2")
g widowed = (sivs=="5")

g married_nokids = (pety=="1")
g married_kids = (pety=="2")
g mother_kids= (pety=="3")
g father_kids = (pety=="4")
g coh_commonkids = (pety== "5")
g coh_nocommonkids= (pety=="6")
g coh_nokids= (pety=="7")


rename tyke unemp_mo
rename tkela disable


keep age vuosi shnro income ututku_aste occupation unemp ///
parttime nchild totalincome activity unemp_mo divorced unmarried married widowed disable married_nokids married_kids mother_kids father_kids coh_commonkids coh_nocommonkids coh_nokids

save "$processed_data/mothers_8800",replace

* 2001-2010
use "D:\ready-made\FOLK_perus_0110a\folk_20012010_tua_perus20tot_2.dta", clear 
keep if sukup=="2" // females
rename tyotu_k income
replace amas1="." if amas1==""
destring amas1,replace
rename amas1 occupational_status
lab define occupation 1 "Employed" 2 "Self-employed"
lab values occupational_status occupation
rename tyela unemp
rename osela parttime
rename lkm_k nchild
rename kturaha_k totalincome
rename ika age
rename ptoim1 activity
destring activity, replace
replace activity=1 if activity==11
replace activity=0 if activity==12
replace activity=. if activity!=0 & activity!=1
g divorced = (sivs == "4")
g unmarried = (sivs == "1")
g married = (sivs == "2")
g widowed = (sivs=="5")

g married_nokids = (pety=="1")
g married_kids = (pety=="2")
g mother_kids= (pety=="3")
g father_kids = (pety=="4")
g coh_commonkids = (pety== "5")
g coh_nocommonkids= (pety=="6")
g coh_nokids= (pety=="7")

rename tyke unemp_mo
rename tkela disable

keep age vuosi shnro income ututku_aste occupation unemp ///
parttime nchild totalincome activity unemp_mo divorced unmarried married widowed disable ///
married_nokids married_kids mother_kids father_kids coh_commonkids coh_nocommonkids coh_nokids

save "$processed_data/mothers_0110",replace

* 2011 to 2019
use "D:\ready-made\FOLK_perus_11a\folk_20112019_tua_perus20tot_2.dta", clear 
keep if sukup=="2" // females
rename tyotu_k income
replace amas1="." if amas1==""
destring amas1,replace
rename amas1 occupational_status
lab define occupation 1 "Employed" 2 "Self-employed"
lab values occupational_status occupation
rename tyela unemp
rename osela parttime
rename lkm_k nchild
rename kturaha_k totalincome
rename ika age
rename ptoim1 activity
destring activity, replace
replace activity=1 if activity==11
replace activity=0 if activity==12
replace activity=. if activity!=0 & activity!=1
g divorced = (sivs == "4")
g unmarried = (sivs == "1")
g married = (sivs == "2")
g widowed = (sivs=="5")

g married_nokids = (pety=="1")
g married_kids = (pety=="2")
g mother_kids= (pety=="3")
g father_kids = (pety=="4")
g coh_commonkids = (pety== "5")
g coh_nocommonkids= (pety=="6")
g coh_nokids= (pety=="7")

rename tyke unemp_mo
rename tkela disable

keep age vuosi shnro income ututku_aste occupation unemp ///
parttime nchild totalincome activity unemp_mo divorced unmarried married widowed disable married_nokids married_kids mother_kids father_kids coh_commonkids coh_nocommonkids coh_nokids

save "$processed_data/mothers_1119",replace

* Append all labor files
use "$processed_data/mothers_8800", clear
append using "$processed_data/mothers_0110"
append using "$processed_data/mothers_1119"
rename shnro shnro_mother
** Reshape 
reshape wide age income ututku_aste occupation unemp ///
parttime nchild totalincome activity unemp_mo divorced unmarried married widowed disable ///
married_nokids married_kids mother_kids father_kids coh_commonkids coh_nocommonkids coh_nokids, i(shnro_mother) j(vuosi)

save "$processed_data/mothers1988_2019_wide.dta",replace

* Merge ownership file
use "$processed_data/ownership1988_2015_wide.dta", clear
rename shnro shnro_mother
merge 1:1 shnro_mother using "$processed_data/mothers1988_2019_wide.dta"
rename _merge merge_ownership
compress
save "$processed_data/mothers1988_2019_wide.dta",replace

rm "$processed_data/mothers_8800.dta"
rm "$processed_data/mothers_0110.dta"
rm "$processed_data/mothers_1119.dta"
rm "$processed_data/ownership1988_2015_wide.dta"

}

*********************************************  
*** 3. MATCH FATHERS WITH CHILDREN
*********************************************

{
* Cleaning to match fathers with children *

* First we need child birth dates, merge with birth registry 
use "$processed_data\clean_births.dta", clear 
rename aiti_shnro shnro
keep shnro child_b_year
save "$processed_data\birthdates_sample",replace

* Append files to get spouse identifiers
** 87-2000
use "D:\ready-made\FOLK_aslii_8800a\folk_19872000_tua_aslii20tot_3.dta", clear
keep shnro spuhnro vuosi
save "$processed_data\spouse1", replace 

** 2001-2010
use "D:\ready-made\FOLK_aslii_0110a\folk_20012010_tua_aslii20tot_3.dta", clear
keep shnro spuhnro vuosi
save "$processed_data\spouse2", replace 

** 2011-2019
use "D:\ready-made\FOLK_aslii_11a\folk_20112019_tua_aslii20tot_3.dta", clear
keep shnro spuhnro vuosi
append using "$processed_data\spouse1"
append using "$processed_data\spouse2"

bys shnro vuosi : gen dup = cond(_N== 1,0,_n)
drop if dup !=0
drop dup
reshape wide spuhnro, i(shnro) j(vuosi)

save "$processed_data\spouse_id", replace

* merge spouses with mother and baby ids
use "$processed_data\spouse_id.dta", clear

merge 1:m shnro using "$processed_data\birthdates_sample.dta"
drop if _merge==1 
drop _merge
gen shnro_father=""
forvalues y=1988(1)2016{
replace shnro_father=spuhnro`y' if child_b_year==`y' 
}

rename shnro shnro_mother

keep shnro_father shnro_mother child_b_year
duplicates drop child_b_year shnro_mother, force
save "$processed_data\spouse_match.dta",replace


* Clean dad's birth year
** from clean labor data-- FOLK files define dad birth year
use "$processed_data\fathers1988_2019.dta", clear
g father_b_year = vuosi- age
keep father_b_year shnro_father
duplicates drop shnro_father, force
save "$processed_data/dadidbirthyear", replace

** match it with info on shnro mother 
use "$processed_data\spouse_match.dta", clear 
merge m:1 shnro_father using "$processed_data/dadidbirthyear"
keep if _merge == 3
drop _merge

save "$processed_data/dadidbirthyear", replace

rm "$processed_data\spouse1.dta"
rm "$processed_data\spouse2.dta"
*rm "$processed_data\birthdates_sample.dta"

}

*********************************************  
*** 4.CLEAN HOSPITALIZATIONS
*********************************************

{
    
* Create dataset of children 
** Birth data 
use "$processed_data\clean_births.dta",clear 
keep aiti_shnro lapsi_shnro
ren lapsi_shnro shnro
save "$processed_data\shnrochild.dta" , replace

set trace off 
* Hospitalizations 1987-1993 
use "$processed_data\shnrochild.dta", clear 
duplicates drop shnro, force
merge 1:m shnro using "D:\b44\custom-made\vos9093_shnro"
drop if _merge == 2

** drop pregnancies
rename pdg ICD9
drop if substr(ICD9,1,2) == "63" | substr(ICD9,1,2) == "64" | ///
substr(ICD9,1,2) == "65" | substr(ICD9,1,2) == "66" | substr(ICD9,1,2) == "67"

** keep  hospital admissions only 
g lengthstay = lpvm - tupva
g inphospital = 1 if lengthstay >= 1 & lengthstay != .
replace inphospital = 0 if inphospital == .

** keep only first hospitalization:
keep shnro tupva inphospital aiti_shnro ICD9 lengthstay
keep if inphospital==1
count
if r(N)>0{
	bys shnro (tupva): gen n_visit=_n
	** keep only first hospitalization observed in data
	keep if n_visit==1
	drop n_visit
	rename tupva tupva_inphospital
	keep if aiti_shnro !=""
	ren shnro lapsi_shnro 
	ren aiti_shnro shnro
	save "$processed_data\d_inphospital8793child.dta",replace
}


* Hospitalizations 94-02 and 03-10
foreach y in 9402 0310{
	clear

	use "$processed_data\shnrochild.dta"
	duplicates drop shnro, force
	merge 1:m shnro using  "D:\b44\custom-made\vos`y'_shnro"
	local 9402_lab "9402"
	local 0310_lab "0310"

	drop if _merge == 2

	** drop hospitalizations with pregnancies
	rename pdg ICD9
	rename pdgo ICD10 
	drop if substr(ICD9,1,2) == "63" | substr(ICD9,1,2) == "64" | substr(ICD9,1,2) == "65" | substr(ICD9,1,2) == "66" | substr(ICD9,1,2) == "67"
	drop if substr(ICD10,1,1) == "O"

	** keep hospital admissions only
	g lengthstay = lpvm - tupva
	g inphospital = 1 if lengthstay >= 1 & lengthstay != .
	replace inphospital = 0 if inphospital == .


	** keep first hospitalization
	keep shnro tupva inphospital aiti_shnro ICD9 ICD10 lengthstay
	keep if inphospital== 1
	count
	if r(N)>0{
		bys shnro (tupva): gen n_visit=_n
		keep if n_visit==1
		drop n_visit
		rename tupva tupva_inphospital
		keep if aiti_shnro !=""
		ren shnro lapsi_shnro 
		ren aiti_shnro shnro
		save "$processed_data\d_inphospital``y'_lab'child.dta",replace
	}
}


* Hospitalizations from 2011 - 2013
foreach y in 2011 2012 2013{

	use "$processed_data\shnrochild.dta", clear
	duplicates drop shnro, force
	merge 1:m shnro using  "D:\b44\custom-made\fleed_hilmo_`y'_shnro"

	drop if _merge == 2

	** drop pregnancies
	rename pdgo ICD10
	drop if substr(ICD10,1,1) == "O"

	** define hospitalizations only
	gen tupva=date(tulopv, "DMY")
	format tupva %td

	gen lpvm=date(lahtopv, "DMY")
	format lpvm %td

	g lengthstay = lpvm - tupva
	g inphospital = 1 if lengthstay >= 1 & lengthstay != .
	replace inphospital = 0 if inphospital == .

	** keep first hospitalization
	keep shnro tupva inphospital aiti_shnro ICD10 lengthstay
	keep if inphospital==1
	count
	if r(N)>0{
		bys shnro (tupva): gen n_visit=_n
		keep if n_visit==1
		drop n_visit
		rename tupva tupva_inphospital
		keep if aiti_shnro !=""
		ren shnro lapsi_shnro 
		ren aiti_shnro shnro
		save "$processed_data\d_inphospital`y'child.dta",replace
	}
}


* Hospitalizations from 2014 - 2017
use "$processed_data\shnrochild.dta", clear
duplicates drop shnro, force
merge 1:m shnro using  "D:\b44\custom-made\hilmo_2014_2017_shnro"
drop if _merge == 2

** drop pregnancies
rename pdgo ICD10 
drop if substr(ICD10,1,1) == "O"

** keep hospitalizations only
gen tupva1 = dofc(tupva)
format tupva1 %td
gen lpvm1 = dofc(lpvm)
format lpvm1 %td

g lengthstay = lpvm1 - tupva1
g inphospital = 1 if lengthstay >= 1 & lengthstay != .
replace inphospital = 0 if inphospital == .

** keep first hospitalization
keep shnro tupva1 inphospital aiti_shnro ICD10 lengthstay
ren tupva1 tupva
keep shnro tupva inphospital aiti_shnro ICD10 lengthstay
keep if inphospital==1
count
if r(N)>0{
	bys shnro (tupva): gen n_visit=_n
	keep if n_visit==1
	drop n_visit
	rename tupva tupva_inphospital
	keep if aiti_shnro !=""
	ren shnro lapsi_shnro 
	ren aiti_shnro shnro
	save "$processed_data\d_inphospital1417child.dta",replace
}

}


*********************************************  
* 5. CREATE DATASET OF UNIQUE HOSPITALIZATIONS
** One observation per mother (or father) based on the first child hospitalization
*********************************************

{

* Import birth data
use "$processed_data\clean_births.dta", clear
bys lapsi_shnro child_b_year : gen dup = cond(_N== 1,0,_n)
drop if dup != 0
drop dup
ren aiti_shnro shnro_mother

* Merge overall data on births with datasets containing date of admission and dummy for hospitalization
** tupva: date of admission -- in 4.define_hospitalizations we keep the only 
** hospitalization for each child
local year_range 8793 9402 0310 2011 2012 2013 1417
** 87-93, 94-02, 03-10, 2011, 2012, 2013, 2014-2017
foreach range of local year_range {
    merge 1:1 lapsi_shnro using ///
		"$processed_data\d_inphospital`range'child.dta", update
		drop if _merge == 2
		drop _merge
}
** Change missings to 0
replace inphospital=0 if inphospital==.

* Generate age of first diagnosis
tab tupva_inphospital,m 
cap drop age_firstinphospital
gen age_firstinphospital=tupva_inphospital-child_b_date
replace age_firstinphospital=age_firstinphospital/365 // in years
su age_firstinphospital

* Generate first diagnosis per mother (if they have several kids diagnosed)
bys shnro_mother : egen first_inphospital=min(tupva_inphospital)
format first_inphospital %td
bys shnro_mother : egen agefirst_diag = min(age_firstinphospital)

* Keep one obs (child) per mother
keep if first_inphospital == tupva_inphospital & tupva_inphospital != .

** remaining duplicates are caused by siblings with simultaneous shocks. 
*** Identify and remove them
bys shnro_mother (child_b_date) : gen twin = cond(_N== 1,0,_n)
drop if twin >= 1 & twin !=.
drop twin
duplicates report shnro_mother

* Save analysis data set
keep shnro_mother lapsi_shnro first_inphospital tupva_inphospital inphospital child_b_date ///
child_b_year mother_b_year finnish single married mother_age unemployed ///
lowskilled_whitecollar highskilled_whitecollar selfemployed manualworkers ///
 male gestation_weeks log_birthweight ICD9 ICD10 first_birth

* Clean variables
** Age at time of event
gen age_firstinphospital = first_inphospital-child_b_date
replace age_firstinphospital=age_firstinphospital/365 // in years
label var age_firstinphospital "Age at event time"
** Gestational weeks
label var gestation_weeks "Gestational weeks" 
** Birthweight
gen birth_weight = exp(log_birthweight)
label var birth_weight "Birthweight"
label var log_birthweight "Log Birthweight"
** Year mother was in hospital
g yearinp=year(first_inphospital)
** Age of mother in hospital
gen age_mother_inp=yearinp - mother_b_year 
label var age_mother_inp "Age mother at admission"
** Single 
label var single "Single"
** Married
label var married "Married"
** High-skilled white collar
label var highskilled_whitecollar "High-skilled white collar"
** Low-skilled white collar
label var lowskilled_whitecollar "Low-skilled white collar"
** Manual workers
label var manualworkers "Manual workers" 
** Self-employed
label var selfemployed "Self-employed"

save "$processed_data\date_diagnosis.dta", replace

* merge with father birth year
use "$processed_data/dadidbirthyear", clear
merge 1:1 shnro_mother child_b_year using "$processed_data\date_diagnosis.dta"
* drop if no health shock
drop if _merge == 1
keep shnro_mother lapsi_shnro first_inphospital child_b_year mother_b_year shnro_father father_b_year male
keep if first_inphospital!=.
save "$processed_data\matched.dta",replace

}

*********************************************  
* 6.CLEAN MORTALITY
** Clean death certificates for the sample of children observed in the birth data 
*********************************************

{
    
* Causes of death registry from 90 to 2014
use "$processed_data\shnrochild.dta", clear
duplicates drop shnro, force
merge 1:1 shnro using "D:\b44\custom-made\kuolinsyyt_tot_u0681_al4"
drop if _merge == 2
drop _merge
rename kvuosi yearmortality

keep if yearmortality>1989 & yearmortality != .
keep if aiti_shnro !=""
ren shnro lapsi_shnro 
ren aiti_shnro shnro
gen date_mortality= date(kuolpvm,"YMD")
format date_mortality %td
save "$processed_data\8714child.dta",replace

* Causes of death registry from 2015 to 2017
use "$processed_data\shnrochild.dta", clear
duplicates drop shnro, force
merge 1:1 shnro using "D:\b44\custom-made\kuolinsyyt_20152017"
drop if _merge == 2
drop _merge
rename kvuosi yearmortality

keep if yearmortality>1989 & yearmortality != .

keep if aiti_shnro !=""
ren shnro lapsi_shnro 
ren aiti_shnro shnro
gen date_mortality= date(kuolpvm,"DMY")
format date_mortality %td
save "$processed_data\1517child.dta",replace

* Causes of death registry 2018
use "$processed_data\shnrochild.dta", clear
duplicates drop shnro, force
merge 1:1 shnro using "D:\b44\custom-made\kuolinsyyt_2018"
drop if _merge == 2
drop _merge
rename kvuosi yearmortality

keep if yearmortality>1989 & yearmortality != .

keep if aiti_shnro !=""
ren shnro lapsi_shnro 
ren aiti_shnro shnro
gen date_mortality= date(kuolpvm,"DMY")
format date_mortality %td
save "$processed_data\18child.dta",replace

}


*********************************************  
* 7. CREATE DATASET OF UNIQUE FATAL SHOCKS
** One observation per mother (or father) based on the first child mortality shock in the family
*********************************************
{
* Load birth data 
use "$processed_data\clean_births.dta", clear 
bys lapsi_shnro child_b_year : gen dup = cond(_N== 1,0,_n)
drop if dup != 0
drop dup
ren aiti_shnro shnro

* Merge child mortality data 
** 87-2014
merge 1:1 lapsi_shnro using "$processed_data\8714child.dta",update
drop if _merge==2
drop _merge
** 15-17
merge 1:1 lapsi_shnro using "$processed_data\1517child.dta",update
drop if _merge==2
drop _merge
** 18
merge 1:1 lapsi_shnro using "$processed_data\18child.dta", update
drop if _merge==2
drop _merge
ren shnro shnro_mother

* gen first mortality per mother (if they have several kids diagnosed)
bys shnro_mother : egen first_mortality=min(date_mortality)

* keep one obs per mother
keep if first_mortality == date_mortality & date_mortality != .
bys shnro_mother (child_b_date) : gen twin = cond(_N== 1,0,_n)
drop if twin >= 1 & twin !=.
drop twin

* Subset variables
keep shnro_mother first_mortality date_mortality mortality child_b_date ///
child_b_year mother_b_year finnish single married mother_age unemployed ///
lowskilled_whitecollar highskilled_whitecollar selfemployed manualworkers ///
male gestation_weeks log_birthweight  first_birth tpksaika


* Clean variables
** Age at time of event
gen age_firstmortality=first_mortality-child_b_date
replace age_firstmortality=age_firstmortality/365 // in years
label var age_firstmortality "Age at event time"
** Gestational weeks
label var gestation_weeks "Gestational weeks" 
** Birthweight
gen birth_weight = exp(log_birthweight)
label var birth_weight "Birthweight"
label var log_birthweight "Log Birthweight"
* Year mother was in hospital
g yearinp=year(first_mortality)
** Age of mother in hospital
gen age_mother_inp=yearinp - mother_b_year 
label var age_mother_inp "Age mother at admission"
** Single 
label var single "Single"
** Married
label var married "Married"
** High-skilled white collar
label var highskilled_whitecollar "High-skilled white collar"
** Low-skilled white collar
label var lowskilled_whitecollar "Low-skilled white collar"
** Manual workers
label var manualworkers "Manual workers" 
** Self-employed
label var selfemployed "Self-employed"

save "$processed_data\date_mortality.dta", replace

* merge with father birth year
use "$processed_data/dadidbirthyear", clear
merge 1:1 shnro_mother child_b_year using "$processed_data\date_mortality.dta"
drop if _merge == 1
keep shnro_mother first_mortality child_b_year mother_b_year shnro_father father_b_year male
keep if first_mortality!=.
save "$processed_data\matched_mortality.dta",replace
 
}

*********************************************  
*** 8.CLEAN MENTAL HEALTH
*********************************************

{
* Inpatient visits
** 1987-1993
use "D:\b44\custom-made\vos9093_shnro", clear

#delimit;
gen keep=1 if substr(pdg,1,2)=="29" | substr(pdg,1,2)=="30"|
substr(pdg,1,2)=="31"  ;
# delimit cr

foreach var of varlist SDG*{
	# delimit;
	replace keep=1 if substr(`var',1,2)=="29" | substr(`var',1,2)=="30"|
	substr(`var',1,2)=="31"  ;
	# delimit cr
}
keep if keep==1
save "$processed_data\mh_8793.dta", replace // all mh diagnoses

** 94-2002
use "D:\b44\custom-made\vos9402_shnro", clear
gen keep=.

foreach var of varlist pdgo pdge SDG*{
	# delimit;
	replace keep=1 if substr(`var',1,1)=="F"  ;
	# delimit cr
}

foreach var of varlist pdg SDG1 SDG2{
	# delimit;
	replace keep=1 if substr(`var',1,2)=="29" | substr(`var',1,2)=="30"|
	substr(`var',1,2)=="31"  ;
	# delimit cr
}
keep if keep==1
save "$processed_data\mh_9402.dta",replace // all mh diagnoses

** 2003 - 2010
use "D:\b44\custom-made\vos0310_shnro", clear
gen keep=.

foreach var of varlist pdgo pdge SDG*{
	# delimit;
	replace keep=1 if substr(`var',1,1)=="F"  ; 
	# delimit cr
}

foreach var of varlist pdg SDG1 SDG2{
	# delimit;
	replace keep=1 if substr(`var',1,2)=="29" | substr(`var',1,2)=="30"|
	substr(`var',1,2)=="31"  ;
	# delimit cr
}

keep if keep==1
save "$processed_data\mh_0310.dta",replace // all mh diagnoses

** 2011 TO 2013 (LOOP)
foreach y in  2011 2012 2013{
	use "D:\b44\custom-made\fleed_hilmo_`y'_shnro", clear
	g keep=.
	foreach var of varlist pdgo pdge sdg*{
		# delimit;
		replace keep=1 if substr(`var',1,1)=="F"  ;
		# delimit cr
	}
	keep if keep==1
	rename tulopv tupva 
	gen tupva2=date(tupva, "DMY")
	format tupva2 %td
	drop tupva
	rename tupva2 tupva
	save "$processed_data\mh_`y'.dta",replace // all mh diagnoses
}

** 2014 - 2017 
use "D:\b44\custom-made\hilmo_2014_2017_shnro", clear
g keep=.
	foreach var of varlist pdgo pdge SDG*{
	# delimit;
	replace keep=1 if substr(`var',1,1)=="F"  ;
	# delimit cr
}
keep if keep==1

gen tupva1 = dofc(tupva)
format tupva1 %td
drop tupva
rename tupva1 tupva
save "$processed_data\mh_1417.dta",replace // all mh diagnoses


* Outpatient visits

** 1998-2007 (only includes those years despite file name)
use "D:\b44\custom-made\eshavo_1998_2010_shnro.dta", clear

g keep=.
foreach var of varlist pdgo pdge SDG1O SDG1E SDG2O SDG2E SDG3O SDG3E{
	# delimit;
	replace keep=1 if substr(`var',1,1)=="F"  ;
	# delimit cr
}
keep if keep==1
g year= year(tupva)

** 2003-2007
preserve
keep if year >= 2003 & year <= 2007
g outpatient = 1
save "$processed_data\mh_0307outpatient.dta",replace // all mh diagnoses
restore

** 1998-2002
preserve
keep if year >= 1998 & year <= 2002
g outpatient = 1
save "$processed_data\mh_9802outpatient.dta",replace // all mh diagnoses
restore

** 2008-2010
use "D:\b44\custom-made\eshavo_2008_2010_shnro", clear

g keep=.
foreach var of varlist pdgo pdge SDG1O SDG1E SDG2O SDG2E SDG3O SDG3E{
	# delimit;
	replace keep=1 if substr(`var',1,1)=="F"  ; 
	# delimit cr
}
keep if keep==1

g outpatient = 1
save "$processed_data\mh_0810outpatient.dta",replace // all mh diagnoses


** 2014-2017
use "D:\b44\custom-made\eshavo_2014_2017_shnro", clear
g keep=.
foreach var of varlist pdgo pdge SDG1O SDG1E SDG2O {
	# delimit;
	replace keep=1 if substr(`var',1,1)=="F"  ;
	# delimit cr
}
keep if keep==1
gen tupva1 = dofc(tupva)
format tupva1 %td
drop tupva
rename tupva1 tupva

g outpatient = 1
save "$processed_data\mh_1417outpatient.dta", replace // all mh diagnoses
 
* append files
** inpatient and outpatient from 1998 to 2002
use "$processed_data\mh_9402.dta", clear 
append using "$processed_data\mh_9802outpatient.dta", force

save "$processed_data\mh9402.dta",replace


** inpatient and outpatient from 2003 to 2010
use "$processed_data\mh_0310.dta", clear 
append using  "$processed_data\mh_0307outpatient.dta", force
append using  "$processed_data\mh_0810outpatient.dta", force

save "$processed_data\mh0310.dta",replace


** inpatient and outpatient from 2014 to 2017
use "$processed_data\mh_1417.dta", clear 
append using  "$processed_data\mh_1417outpatient.dta", force

save "$processed_data\mh1417.dta",replace


** all years
use "$processed_data\mh_8793", clear

foreach y in 9402 0310 {
	append using "$processed_data\mh`y'",force
}
foreach y in 2011 2012 2013{
	append using "$processed_data\mh_`y'",force
}
append using "$processed_data\mh1417.dta"

keep shnro tupva
gen shnro_mother=shnro
gen shnro_father=shnro

save "$processed_data\mh_allyears_onlydate.dta", replace

* Remove temp files not used in analysis
local files_to_delete "mh_8793 mh_9402 mh_0310 mh_2011 mh_2012 mh_2013 mh_1417 mh_0307outpatient mh_9802outpatient mh_0810outpatient mh_1417outpatient mh9402 mh0310 mh1417"
foreach file of local files_to_delete {
	rm "$processed_data\\`file'.dta"
}


* Count the number of mental health visits in a single year
** Mothers
use "$processed_data/mh_allyears_onlydate.dta", clear 
g year_diagnosis=year(tupva)
bys shnro_mother year_diagnosis: egen n_diag_year=count(year_diagnosis)
duplicates drop shnro_mother year_diagnosis, force 
drop if missing(shnro_mother)

keep shnro_mother year_diagnosis n_diag_year
reshape wide n_diag_year,i(shnro_mother) j(year_diag)

save "$processed_data/mentalhealthmom.dta",replace 

** fathers
use "$processed_data/mh_allyears_onlydate.dta", clear 
g year_diagnosis=year(tupva)
bys shnro_father year_diagnosis: egen n_diag_year=count(year_diagnosis)
duplicates drop shnro_father year_diagnosis, force 
drop if missing(shnro_father)

keep shnro_father year_diagnosis n_diag_year
reshape wide n_diag_year,i(shnro_father) j(year_diag)

save "$processed_data/mentalhealthdad.dta",replace 

}

*********************************************  
*** 9.CLEAN PARENTAL HOSPITALIZATIONS
*********************************************

{

* Import data and define variables

** Hospitalizations 1987-1993 
use "D:\b44\custom-made\vos9093_shnro", clear

* keep hospital admissions only (at least 1 day)
g lengthstay = lpvm - tupva
g inphospital = 1 if lengthstay >= 1 & lengthstay != .
replace inphospital = 0 if inphospital == .
drop if inphospital == 0

rename pdg ICD9

* keep id, date of admission and dummy for hospitalization
keep shnro tupva inphospital ICD9 lengthstay
save "$processed_data\d_inphospital8793.dta",replace

** Hospitalizations from 94-02 and 03-10
foreach y in 9402 0310{

	use  "D:\b44\custom-made\vos`y'_shnro", clear
	local 9402_lab "9402"
	local 0310_lab "0310"

	* keep  hospital admissions only (at least 1 day)
	g lengthstay = lpvm - tupva
	g inphospital = 1 if lengthstay >= 1 & lengthstay != .
	replace inphospital = 0 if inphospital == .
	drop if inphospital == 0

	rename pdg ICD9
	rename pdgo ICD10 

	* keep id, date of admission and dummy for hospitalization
	keep shnro tupva inphospital ICD9 ICD10 lengthstay
	save "$processed_data\d_inphospital``y'_lab'.dta",replace
}

** Hospitalizations from 2011 - 2013
foreach y in 2011 2012 2013{

	use "D:\b44\custom-made\fleed_hilmo_`y'_shnro", clear

	* define hospitalizations (at least one night))
	gen tupva=date(tulopv, "DMY")
	format tupva %td

	gen lpvm=date(lahtopv, "DMY")
	format lpvm %td

	g lengthstay = lpvm - tupva
	g inphospital = 1 if lengthstay >= 1 & lengthstay != .
	replace inphospital = 0 if inphospital == .
	drop if inphospital == 0 
	rename pdgo ICD10

	* keep id, date of admission and dummy for hospitalization
	keep shnro tupva inphospital ICD10 lengthstay
	save "$processed_data\d_inphospital`y'.dta",replace
}


** Hospitalizations from 2014 - 2017
use "D:\b44\custom-made\hilmo_2014_2017_shnro", clear

* keep hospitalizations (at least one night)
gen tupva1 = dofc(tupva)
format tupva1 %td

gen lpvm1 = dofc(lpvm)
format lpvm1 %td

g lengthstay = lpvm1 - tupva1
g inphospital = 1 if lengthstay >= 1 & lengthstay != .
replace inphospital = 0 if inphospital == .
drop if inphospital == 0 
rename pdgo ICD10 

* keep id, date of admission and dummy for hospitalization
keep shnro tupva1 inphospital ICD10 lengthstay
ren tupva1 tupva
save "$processed_data\d_inphospital1417.dta",replace

}

*********************************************  
*** 10.CLEAN PARENTAL MORTALITY
*********************************************

{

* Import data and define variables
** Causes of death registry from 90 to 2014
use "$processed_data\shnrochild.dta", clear

duplicates drop shnro, force
merge 1:1 shnro using "D:\b44\custom-made\kuolinsyyt_tot_u0681_al4"
drop if _merge == 2
drop _merge

rename kvuosi yearmortality

keep if yearmortality>1989 & yearmortality != .
*keep if aiti_shnro !=""
ren shnro lapsi_shnro 
*ren aiti_shnro shnro
gen date_mortality= date(kuolpvm,"YMD")
format date_mortality %td
save "$processed_data\8714child.dta",replace

** Causes of death registry from 2015 to 2017
use "$processed_data\shnrochild.dta", clear

duplicates drop shnro, force
merge 1:1 shnro using "D:\b44\custom-made\kuolinsyyt_20152017"
drop if _merge == 2
drop _merge
rename kvuosi yearmortality

keep if yearmortality>1989 & yearmortality != .

keep if aiti_shnro !=""
ren shnro lapsi_shnro 
ren aiti_shnro shnro
gen date_mortality= date(kuolpvm,"DMY")
format date_mortality %td
save "$processed_data\1517child.dta",replace

** Causes of death registry 2018
use "$processed_data\shnrochild.dta", clear
duplicates drop shnro, force
merge 1:1 shnro using "D:\b44\custom-made\kuolinsyyt_2018"
drop if _merge == 2
drop _merge
rename kvuosi yearmortality

keep if yearmortality>1989 & yearmortality != .

keep if aiti_shnro !=""
ren shnro lapsi_shnro 
ren aiti_shnro shnro
gen date_mortality= date(kuolpvm,"DMY")
format date_mortality %td
save "$processed_data\18child.dta",replace

}

*********************************************  
*** 11.CLEAN PARENTAL TRANSFERS
*********************************************

{
* CLEAN AND RENAME VARIABLES
** 87-2000
use "D:\ready-made\FOLK_tulo_8800a\folk_19872000_tua_tulo21tot_1.dta",clear
rename saatusi transfers
keep  vuosi shnro transfers
save "$processed_data/transfers_8800",replace

** 2001-2010
use "D:\ready-made\FOLK_tulo_0110a\folk_20012010_tua_tulo21tot_1", clear 
rename saatusi transfers
keep  vuosi shnro transfers
save "$processed_data/transfers_0110",replace

** 2011 to 2019
use "D:\ready-made\FOLK_tulo_11a\folk_20112019_tua_tulo21tot_1.dta", clear 
rename saatusi transfers
keep  vuosi shnro transfers
save "$processed_data/transfers_1119",replace

* Append all files
/*Fathers*/
** to merge for fathers
use "$processed_data/transfers_8800", clear
append using "$processed_data/transfers_0110"
append using "$processed_data/transfers_1119"
rename shnro shnro_father

replace transfers= 0 if transfers < 0
bys shnro_father vuosi : gen dup = cond(_N== 1,0,_n)

reshape wide transfers, i(shnro_father) j(vuosi)
save "$processed_data/transfers_fathers1988_2019_wide.dta",replace

/*Mothers*/
** to merge for mothers
use "$processed_data/transfers_8800", clear
append using "$processed_data/transfers_0110"
append using "$processed_data/transfers_1119"
rename shnro shnro_mother

replace transfers= 0 if transfers < 0
bys shnro_mother vuosi : gen dup = cond(_N== 1,0,_n)

reshape wide transfers, i(shnro_mother) j(vuosi)
save "$processed_data/transfers_mothers1988_2019_wide.dta",replace

* Remove files not used in analysis
local files_to_delete "transfers_8800 transfers_0110 transfers_1119"
foreach file of local files_to_delete {
	rm "$processed_data\\`file'.dta"
}

}

*********************************************  
*** 12.CLEAN PARENTAL ALLOWANCES
*********************************************

{
* CLEAN AND RENAME VARIABLES

** 87-2000
use "D:\ready-made\FOLK_askun_8700a\folk_19872000_tua_askun21tot_1.dta",clear
rename perav_ak family_allowance
keep  vuosi shnro family_allowance
save "$processed_data/family_allowance_8800",replace

** 2001-2010
use "D:\ready-made\FOLK_askun_0110a\folk_20012010_tua_askun21tot_1", clear
rename perav_ak family_allowance
keep  vuosi shnro family_allowance
save "$processed_data/family_allowance_0110",replace

* 2011 to 2019
use "D:\ready-made\FOLK_askun_11a\folk_20112019_tua_askun21tot_1.dta", clear 
rename perav_ak family_allowance
keep  vuosi shnro family_allowance
save "$processed_data/family_allowance_1119",replace

* Append all files
/*Fathers*/
** to merge for fathers
use "$processed_data/family_allowance_8800", clear
append using "$processed_data/family_allowance_0110"
append using "$processed_data/family_allowance_1119"
rename shnro shnro_father
replace family_allowance = 0 if family_allowance < 0
bys shnro_father vuosi : gen dup = cond(_N== 1,0,_n)

reshape wide family_allowance, i(shnro_father) j(vuosi)
save "$processed_data/family_allowance_fathers1988_2019_wide.dta",replace

/*Mothers*/
** to merge for mothers
clear
use "$processed_data/family_allowance_8800"
append using "$processed_data/family_allowance_0110"
append using "$processed_data/family_allowance_1119"
rename shnro shnro_mother
replace family_allowance = 0 if family_allowance < 0

bys shnro_mother vuosi : gen dup = cond(_N== 1,0,_n)

reshape wide family_allowance, i(shnro_mother) j(vuosi)
save "$processed_data/family_allowance_mothers1988_2019_wide.dta",replace

* Remove files not used in analysis
local files_to_delete "family_allowance_8800 family_allowance_0110 family_allowance_1119"
foreach file of local files_to_delete {
	rm "$processed_data\\`file'.dta"
}

}

*********************************************  
*** 13.CLEAN PARENTAL LOCATION
*********************************************

{
* Clean dataset of parents and grandparents
** Load parent-child identifiers
use "D:\ready-made\FOLK_laps_70a\folk_19702019_tua_laps20_1.dta", clear
duplicates tag shnro, gen(dup)
tab dup,m 
** There are 72 observations with missing child IDs
*** Drop these
drop if dup == 71 
drop dup 
rename shnro_m grandmother_id
rename shnro_f grandfather_id
** identifier for same family: same grandparents
egen grandparents_id=group(grandmother_id grandfather_id)
save "$processed_data/grandparents_ids",replace


* Clean matched spouse data (DiD) 
use "$processed_data\matched.dta", clear

** Create variable for the year two years prior to the health shock
g yearinphospital = year(first_inphospital)
g yearinphospitalminus2 = yearinphospital - 2
gen vuosi=yearinphospitalminus2

** Merge matched parents with grandparents on mother ID
rename shnro_mother shnro
merge 1:1 shnro using "$processed_data/grandparents_ids", ///
keep(1 3) keepusing(grandmother_id grandfather_id) nogen
rename shnro shnro_mother 

** Merge in municipality and regional information on grandmothers
rename grandmother_id shnro

*** 1987-2000
merge m:1 shnro vuosi using "D:\ready-made\FOLK_perus_8800a\folk_19872000_tua_perus19tot_3", ///
keep(1 3) keepusing(kunta taaja mkunta) nogen

*** 2001-2010
merge m:1 shnro vuosi using  "D:\ready-made\FOLK_perus_0110a\folk_20012010_tua_perus19tot_3", ///
keepusing(kunta taaja mkunta) update 
drop if _merge == 2
drop _merge 

*** 2011 to 2019
merge m:1 shnro vuosi using  "D:\ready-made\FOLK_perus_11a\folk_20112019_tua_perus20tot_2", ///
keepusing(kunta taaja mkunta) update 
drop if _merge == 2
drop _merge 

** Clean merged variables 
rename shnro grandmother_id 
rename mkunta region_grandmother 
label var region_grandmother "region of residence grandmother"
rename kunta municipality_grandmother 
label var municipality "municipality of residence grandmother"
rename taaja locality_grandmother 
label var locality "locality of residence grandmother"

** Merge in municipality information on grandfathers
rename grandfather_id shnro

*** 1987-2000
merge m:1 shnro vuosi using "D:\ready-made\FOLK_perus_8800a\folk_19872000_tua_perus19tot_3", ///
keep(1 3) keepusing(kunta taaja mkunta) nogen

*** 2001-2010
merge m:1 shnro vuosi using  "D:\ready-made\FOLK_perus_0110a\folk_20012010_tua_perus19tot_3", ///
keepusing(kunta taaja mkunta) update 
drop if _merge == 2
drop _merge 

*** 2011-2019
merge m:1 shnro vuosi using  "D:\ready-made\FOLK_perus_11a\folk_20112019_tua_perus20tot_2", ///
keepusing(kunta taaja mkunta) update 
drop if _merge == 2
drop _merge 

** Clean merged variables 
rename shnro grandfather_id 
rename mkunta region_grandfather 
label var region_grandfather "region of residence grandfather"
rename kunta municipality_grandfather 
label var municipality_grandfather "municipality of residence grandfather"
rename taaja locality_grandfather 
label var locality_grandfather "locality of residence grandfather"

** Merge in municipality information on mothers  
rename shnro_mother shnro

*** 1987-2000
merge m:1 shnro vuosi using "D:\ready-made\FOLK_perus_8800a\folk_19872000_tua_perus19tot_3", ///
keep(1 3) keepusing(kunta taaja mkunta) nogen

*** 2001-2010
merge m:1 shnro vuosi using  "D:\ready-made\FOLK_perus_0110a\folk_20012010_tua_perus19tot_3", ///
keepusing(kunta taaja mkunta) update 
drop if _merge == 2
drop _merge 

*** 2011-2019
merge m:1 shnro vuosi using  "D:\ready-made\FOLK_perus_11a\folk_20112019_tua_perus20tot_2", ///
keepusing(kunta taaja mkunta) update 
drop if _merge == 2
drop _merge 

** Clean merged variables 
rename shnro shnro_mother 
rename mkunta region_mother 
label var region_mother "region of residence mother"
rename kunta municipality_mother 
label var municipality_mother "municipality of residence mother"
rename taaja locality_mother 
label var locality_mother "locality of residence mother"

** Create indicator of mothers and at least one grandparent living in the same municipality 
gen mother_same_municpality = 0
replace mother_same_municpality = 1 if ///
inlist(municipality_mother, municipality_grandmother, municipality_grandfather)
replace mother_same_municpality = . if missing(municipality_mother) | (missing(municipality_grandmother) & missing(municipality_grandfather))
tab mother_same_municpality,m 


** Create variable of grandmother having the same municipality only
cap drop mother_same_municpality_gma 
gen mother_same_municpality_gma = 0
replace mother_same_municpality_gma = 1 if municipality_mother == municipality_grandmother
replace mother_same_municpality_gma = . if missing(municipality_mother) | missing(municipality_grandmother)
tab mother_same_municpality_gma,m 


** Create indicator of mothers and at least one grandparent living in the same region 
gen mother_same_region = 0
replace mother_same_region = 1 if ///
inlist(region_mother, region_grandmother, region_grandfather)
replace mother_same_region = . if missing(region_mother) | ///
(missing(region_grandmother) & missing(region_grandfather))
tab mother_same_region,m 


** Create variable of grandmother having the same region only
cap drop mother_same_region_gma 
gen mother_same_region_gma = 0
replace mother_same_region_gma = 1 if region_mother == region_grandmother
replace mother_same_region_gma = . if missing(region_mother) | missing(region_grandmother)
tab mother_same_region_gma,m 


save "$processed_data/location",replace

}

*********************************************  
*** 14. CREATE MEASURE OF CARE BURDEN 
*********************************************

{
*** 1. IMPORT DATA AND MERGE ***

* Inpatient hospitalizations 1987-1993 
use "$processed_data\shnrochild.dta", clear
duplicates drop shnro , force
merge 1:m shnro using "D:\b44\custom-made\vos9093_shnro"
drop if _merge == 2

** Length of stay
g lengthstay = lpvm - tupva
drop if lengthstay < 0

** Identify all visits
g visit = 1 if _merge == 3
replace visit = 0 if visit == .

** Drop pregnancies
rename pdg ICD9
drop if substr(ICD9,1,2) == "63" | substr(ICD9,1,2) == "64" | ///
substr(ICD9,1,2) == "65" | substr(ICD9,1,2) == "66" | substr(ICD9,1,2) == "67"

** Keep id, date of admission and dummy for visit
keep shnro aiti_shnro tupva visit
drop if visit == 0
ren tupva tupva_visit
save "$processed_data\visits8793child.dta",replace

* Inpatient hospitalizations 1994-2002 and 2003-2010
local 9402_lab "9402"
local 0310_lab "0310"
foreach y in 9402 0310{

	use "$processed_data\shnrochild.dta", clear
	duplicates drop shnro, force

	merge 1:m shnro using  "D:\b44\custom-made\vos`y'_shnro"
	drop if _merge == 2

	** Drop pregnancies
	rename pdg ICD9
	rename pdgo ICD10 
	drop if substr(ICD9,1,2) == "63" | substr(ICD9,1,2) == "64" | ///
	substr(ICD9,1,2) == "65" | substr(ICD9,1,2) == "66" | substr(ICD9,1,2) == "67"
	drop if substr(ICD10,1,1) == "O"

	** Length of stay 
	g lengthstay = lpvm - tupva
	drop if lengthstay < 0

	** Identify all visits
	g visit = 1 if _merge == 3
	replace visit = 0 if visit == .

	** keep id, date of admission and dummy for visit
	keep shnro aiti_shnro tupva visit
	drop if visit == 0
	ren tupva tupva_visit

	save "$processed_data\visits``y'_lab'child.dta",replace

}

* Inpatient hospitalizations 2011, 2012, 2013
foreach y in 2011 2012 2013{

	use "$processed_data\shnrochild.dta", clear
	duplicates drop shnro, force

	merge 1:m shnro using  "D:\b44\custom-made\fleed_hilmo_`y'_shnro"
	drop if _merge == 2

	** Length of stay
	gen tupva=date(tulopv, "DMY")
	format tupva %td
	gen lpvm=date(lahtopv, "DMY")
	format lpvm %td
	g lengthstay = lpvm - tupva
	drop if lengthstay < 0
	
	** Drop pregnancies
	rename pdgo ICD10
	drop if substr(ICD10,1,1) == "O"

	
	** Identify all visits
	g visit = 1 if _merge == 3
	replace visit = 0 if visit == .


	** keep id, date of admission and dummy for visit
	keep shnro aiti_shnro tupva visit
	drop if visit == 0
	ren tupva tupva_visit
	save "$processed_data\visits`y'child.dta",replace
}

* Inpatient hospitalizations 2014-2017
use "$processed_data\shnrochild.dta", clear
duplicates drop shnro, force

merge 1:m shnro using  "D:\b44\custom-made\hilmo_2014_2017_shnro"
drop if _merge == 2

** Length of stay
gen tupva1 = dofc(tupva)
format tupva1 %td
gen lpvm1 = dofc(lpvm)
format lpvm1 %td
g lengthstay = lpvm1 - tupva1
drop if lengthstay < 0
drop tupva
ren tupva1 tupva

** Identify all visits
g visit = 1 if _merge == 3
replace visit = 0 if visit == .

** Drop pregnancies
rename pdgo ICD10 
drop if substr(ICD10,1,1) == "O"

** keep id, date of admission and dummy for visit
keep shnro aiti_shnro tupva visit
drop if visit == 0
ren tupva tupva_visit

save "$processed_data\visits1417child.dta",replace
 


*****************************

* Outpatient hospitalizations 1998-2007
use "$processed_data\shnrochild.dta", clear
duplicates drop shnro , force
merge 1:m shnro using "D:\b44\custom-made\eshavo_1998_2010_shnro.dta"

** identify all visits
g visit = 1 if _merge == 3
replace visit = 0 if visit == .

** keep id, date of admission and dummy for visit
keep shnro aiti_shnro tupva visit
drop if visit == 0
ren tupva tupva_visit
g year= year(tupva)
g outpatient = 1

	** 1998-2002
	preserve
	keep if year >= 1998 & year <= 2002
	save "$processed_data\visits9802outpatient.dta",replace 
	restore
	
	** 2003-2007
	keep if year >= 2003 & year <= 2007
	save "$processed_data\visits0307outpatient.dta",replace 
	
* Outpatient hospitalizations 2008-2010
use "$processed_data\shnrochild.dta", clear
duplicates drop shnro , force
merge 1:m shnro using "D:\b44\custom-made\eshavo_2008_2010_shnro"

** identify all visits
g visit = 1 if _merge == 3
replace visit = 0 if visit == .
g outpatient = 1

** keep id, date of admission and dummy for visit
keep shnro aiti_shnro tupva visit
drop if visit == 0
ren tupva tupva_visit

save "$processed_data\visits0810outpatient.dta",replace 

* Outpatient hospitalizations 2014-2017
use "$processed_data\shnrochild.dta", clear
duplicates drop shnro , force
merge 1:m shnro using "D:\b44\custom-made\eshavo_2014_2017_shnro" 

** Identify all visits
g visit = 1 if _merge == 3
replace visit = 0 if visit == .
g outpatient = 1

gen tupva1 = dofc(tupva)
drop tupva
ren tupva1 tupva

** keep id, date of admission and dummy for visit
keep shnro aiti_shnro tupva visit
drop if visit == 0
ren tupva tupva_visit

save "$processed_data\visits1417outpatient.dta",replace 


** append files
*** Inpatient
use "$processed_data\visits8793child.dta", clear 
append using "$processed_data\visits9402child.dta"
append using "$processed_data\visits0310child.dta"
append using "$processed_data\visits2011child.dta"
append using "$processed_data\visits2012child.dta"
append using "$processed_data\visits2013child.dta"
append using "$processed_data\visits1417child.dta"

*** Outpatient
append using "$processed_data\visits9802outpatient.dta"
append using "$processed_data\visits0307outpatient.dta"
append using "$processed_data\visits0810outpatient.dta"
append using "$processed_data\visits1417outpatient.dta"

keep shnro aiti_shnro tupva_visit  
compress 
rename aiti_shnro shnro_mother
rename shnro lapsi_shnro
gen hosp_ID = lapsi_shnro
save "$processed_data\visits_onlydate.dta", replace


* CALCULATE N VISITS AFTER HEALTH SHOCK 
use "$processed_data\matched.dta", clear 
merge 1:m lapsi_shnro using "$processed_data\visits_onlydate.dta"


keep if _merge==3
drop _merge

g year_visit=year(tupva_visit)

** Count visits per year and child (mother) 
bys shnro_mother year_visit: egen n_visit_year=count(year_visit)

** Year first in hospital 
g yearinphospital=year(first_inphospital)

** Variables for number of visits years before and after health shock
keep shnro_mother child_b_year year_visit n_visit_year yearinph ///
first_inphospital mother_b_year father_b_year shnro_father
** One obs per mother and year of diagnosis
collapse (first) n_visit_year child_b_year yearinph first_inphospital ///
mother_b_year father_b_year shnro_father, by(shnro_mother year_visit)
** Put in wide format
reshape wide n_visit_year, i(shnro_mother) j(year_visit)
** substitute missings with 0's
foreach y of numlist 1990(1)2017{
	replace n_visit_year`y'=0 if n_visit_year`y'==.
}

forvalues x=0(1)10{
	g visitsyear`x'=.
}

forvalues y=1990(1)2017{
	forvalues x= 0(1)10{
		local year=`y'+`x'
		if `y'<=(2016-`x'){
			replace visitsyear`x'=n_visit_year`year' if yearinph==`y'
		}
	}
}

* calculate  visits in x years before 
forvalues x=1(1)5{
	g visitsyearminus`x'=.
}

forvalues y=1990(1)2017{
	forvalues x= 1(1)5{
		local year=`y'-`x'
		if `year'>=1990{
			replace visitsyearminus`x'=n_visit_year`year' if yearinph==`y'
		}
	}
}

g high_burden = 0 
sum visitsyear0
replace high_burden = 1 if visitsyear0 > r(mean)
tab high_burden ,m 

* high burden
preserve
keep if high_burden==1
keep shnro_mother first_inphospital child_b_year n_visit_year* ///
high_burden yearinphospital mother_b_year father_b_year shnro_father
keep if first_inphospital!=.
save "$processed_data\matched_h_mean.dta",replace
restore

* low burden
preserve
keep if high_burden==0
keep shnro_mother first_inphospital child_b_year n_visit_year* high_burden yearinphospital mother_b_year father_b_year shnro_father
keep if first_inphospital!=.
save "$processed_data\matched_l_mean.dta",replace
restore

* Remove files not used in analysis
local files_to_delete "visits8793child visits9402child visits0310child visits2011child visits2012child visits2013child visits1417child visits9802outpatient visits0307outpatient visits0810outpatient visits1417outpatient"
foreach file of local files_to_delete {
	cap rm "$processed_data\\`file'.dta"
}

}

*********************************************  
*** 15. CREATE MATCHED SAMPLE FOR CANCER AND Skin 
**** Hospitalizations
*********************************************

{
* Prepare sample
use "$processed_data/dadidbirthyear", clear
merge 1:1 shnro_mother child_b_year using "$processed_data\date_diagnosis.dta"
* drop if no health shock
drop if _merge == 1
keep shnro_mother first_inphospital child_b_year mother_b_year ///
shnro_father father_b_year male ICD10 ICD9
keep if first_inphospital!=.

* Generate cancer and skin subgroups 
g newdiag10 = substr(ICD10,1,4)
g newdiag9 = substr(ICD9,1,4)

g cancer = 1 if substr(newdiag10, 1,1) == "C" | ///
	substr(newdiag10, 1,2) == "D1"  | substr(newdiag10, 1,2) == "D2"  | ///
	substr(newdiag10, 1,2) == "D3"  | substr(newdiag10, 1,2) == "D4"  
 
replace cancer = 1 if substr(newdiag9, 1,2) == "14" | ///
	substr(newdiag9, 1,2) == "15" | substr(newdiag9, 1,2) == "16" | ///
	substr(newdiag9, 1,2) == "17" | substr(newdiag9, 1,2) == "18" | ///
	substr(newdiag9, 1,2) == "19" | substr(newdiag9, 1,2) == "20" | ///
	substr(newdiag9, 1,2) == "21" | substr(newdiag9, 1,2) == "22" | ///
	substr(newdiag9, 1,2) == "23" 
 


g skin = 1 if substr(newdiag10,1,1) == "L"


* Generate high-level subgroups
g grouponly = substr(ICD10,1,2) 
g groups = 1 if substr(grouponly,1,1) == "A" | substr(grouponly,1,1) == "B" 
replace groups = 2 if substr(grouponly,1,1) == "C"
replace groups = 2 if substr(grouponly,1,2) == "D1" | ///
substr(grouponly,1,2) == "D2" | substr(grouponly,1,2) == "D3" | ///
substr(grouponly,1,2) == "D4"   
replace groups = 3 if substr(grouponly,1,1) == "D" & groups != 2
replace groups = 4 if substr(grouponly,1,1) == "E"
replace groups = 5 if substr(grouponly,1,1) == "F"
replace groups = 6 if substr(grouponly,1,1) == "G"
replace groups = 7 if substr(grouponly,1,2) == "H1" | ///
substr(grouponly,1,2) == "H2" | substr(grouponly,1,2) == "H3" | ///
substr(grouponly,1,2) == "H4"   | substr(grouponly,1,2) == "H5"
replace groups = 8 if substr(grouponly,1,1) == "H" & groups != 7
replace groups = 9 if substr(grouponly,1,1) == "I"
replace groups = 10 if substr(grouponly,1,1) == "J"
replace groups = 11 if substr(grouponly,1,1) == "K"
replace groups = 12 if substr(grouponly,1,1) == "L"
replace groups = 13 if substr(grouponly,1,1) == "M"
replace groups = 14 if substr(grouponly,1,1) == "N"
replace groups = 15 if substr(grouponly,1,1) == "O"
replace groups = 17 if substr(grouponly,1,1) == "Q"
replace groups = 18 if substr(grouponly,1,1) == "R"
replace groups = 19 if substr(grouponly,1,1) == "S" | ///
substr(grouponly,1,1) == "T"
replace groups = 21 if substr(grouponly,1,1) == "Z"
* Note: group 20 only one observation
label define icd10 1 "Infections" 2 "Neoplasms" 3 "Blood" ///
4 "Endocrine" 5 "Mental" 6 "Nervous" 7 "Eye" 8 "Ear" 9 "Circulatory" ///
10 "Respiratory" 11 "Digestive" 12 "Skin" 13 "Muscoloskeletal" ///
14 "Genitourinary" 17 "Congenital" ///
18 "Symptoms" 19 "Injury" 21 "Factors", replace 
label val groups icd10
tab groups,m 


save "$processed_data\matched_hg.dta",replace

}

*********************************************
*** 16. CREATE MATCHED SAMPLE FOR MUTUAL SHOCKS
**** Hospitalizations
*********************************************

{
set trace on
global files_to_delete 
* Mother
** Identify mutual hospitalizations of a mother with her child's hospitalization 
*** +- 1 month
foreach y in 8793 9402 0310 2011 2012 2013 1417 {
	use "$processed_data\matched.dta", clear
	rename shnro_mother shnro
	merge 1:m shnro using "$processed_data\d_inphospital`y'.dta"

	keep if _merge == 3

	g days_difference = tupva - first_inphospital
	g mutual_shock = 1 if days_difference <= 30 & days_difference >= - 30

	keep if mutual_shock == 1 
	keep shnro mutual_shock
	rename shnro shnro_mother
	duplicates drop shnro_mother, force
	save "$processed_data\mutual_shock_month`y'.dta", replace
	global files_to_delete $files_to_delete "mutual_shock_month`y'"
}

*** +- 1 week
foreach y in 8793 9402 0310 2011 2012 2013 1417 {
	use "$processed_data\matched.dta", clear
	rename shnro_mother shnro
	merge 1:m shnro using "$processed_data\d_inphospital`y'.dta"

	keep if _merge == 3

	g days_difference = tupva - first_inphospital
	g mutual_shock = 1 if days_difference <= 7 & days_difference >= - 7

	keep if mutual_shock == 1 
	keep shnro mutual_shock
	rename shnro shnro_mother
	duplicates drop shnro_mother, force
	save "$processed_data\mutual_shock_week`y'.dta", replace
	global files_to_delete $files_to_delete "mutual_shock_week`y'"
}



** Mortality
*** 90-2014
**** +- 1 month
use "$processed_data\matched.dta", clear 
rename shnro_mother shnro
merge 1:m shnro using "D:\b44\custom-made\kuolinsyyt_tot_u0681_al4"
keep if _merge == 3
gen date_mortality= date(kuolpvm,"YMD")
format date_mortality %td

g days_difference = date_mortality - first_inphospital
g mutual_shock = 1 if days_difference <= 30 & days_difference >= - 30

keep if mutual_shock == 1 
keep shnro mutual_shock
rename shnro shnro_mother
duplicates drop shnro_mother, force
save "$processed_data\mutual_shock_month_mortality_19902014.dta", replace
global files_to_delete $files_to_delete "mutual_shock_month_mortality_19902014"

**** +- 1 week
use "$processed_data\matched.dta", clear 
rename shnro_mother shnro
merge 1:m shnro using "D:\b44\custom-made\kuolinsyyt_tot_u0681_al4"
keep if _merge == 3
gen date_mortality= date(kuolpvm,"YMD")
format date_mortality %td

g days_difference = date_mortality - first_inphospital
g mutual_shock = 1 if days_difference <= 7 & days_difference >= - 7

keep if mutual_shock == 1 
keep shnro mutual_shock
rename shnro shnro_mother
duplicates drop shnro_mother, force
save "$processed_data\mutual_shock_week_mortality_19902014.dta", replace
global files_to_delete $files_to_delete "mutual_shock_week_mortality_19902014"

* Father
/*
- father can appear more than once (divorced and then children in another family)
- generate duplicates for father and then clean mutual shocks for each subsample
*/

use "$processed_data\matched.dta", clear
rename shnro_father shnro
bysort shnro : gen tag= _n
drop if shnro == ""
foreach i in  1 2 3{
	preserve
	keep if tag == `i'
	save "$processed_data\subsample_f`i'.dta", replace
	global files_to_delete $files_to_delete "subsample_f`i'"
restore
}

** Hospitalizations
*** +- 1 month
foreach y in 8793 9402 0310 2011 2012 2013 1417{
	foreach father_subsample in 1 2 3 {
		use "$processed_data\subsample_f`father_subsample'.dta", clear
		merge 1:m shnro using "$processed_data\d_inphospital`y'.dta"
		keep if _merge == 3

		g days_difference = tupva - first_inphospital
		g mutual_shock_f = 1 if days_difference <= 30 & days_difference >= - 30

		keep if mutual_shock_f == 1 
		if _N > 0 {

			keep shnro shnro_mother mutual_shock_f
			rename shnro shnro_father
			duplicates drop shnro_father shnro_mother, force
			save "$processed_data\mutual_shock_month`y'_f`father_subsample'.dta", replace
			global files_to_delete $files_to_delete "mutual_shock_month`y'_f`father_subsample'"
			** no observations for subsample = 3
		}
	}
}

*** +- 1 week
foreach y in 8793 9402 0310 2011 2012 2013 1417{
	foreach father_subsample in 1 2 3 {
		use "$processed_data\subsample_f`father_subsample'.dta", clear
		merge 1:m shnro using "$processed_data\d_inphospital`y'.dta"
		keep if _merge == 3

		g days_difference = tupva - first_inphospital
		g mutual_shock_f = 1 if days_difference <= 7 & days_difference >= - 7

		keep if mutual_shock_f == 1 
		if _N > 0 {
			keep shnro shnro_mother mutual_shock_f
			rename shnro shnro_father
			duplicates drop shnro_father shnro_mother, force
			save "$processed_data\mutual_shock_week`y'_f`father_subsample'.dta", replace
			global files_to_delete $files_to_delete "mutual_shock_week`y'_f`father_subsample'"
			** no observations for subsample = 3
		}
	}
}


** mortality 
*** +- 1 month
foreach father_subsample in 1 2 3 {

	use "$processed_data\subsample_f`father_subsample'.dta", clear
	merge 1:m shnro using "D:\b44\custom-made\kuolinsyyt_tot_u0681_al4"
	keep if _merge == 3
	gen date_mortality= date(kuolpvm,"YMD")
	format date_mortality %td

	g days_difference = date_mortality - first_inphospital
	g mutual_shock_f = 1 if days_difference <= 30 & days_difference >= - 30

	keep if mutual_shock_f == 1 
	if _N > 0 {
		keep shnro shnro_mother mutual_shock_f
		rename shnro shnro_father
		duplicates drop shnro_father shnro_mother, force

		save "$processed_data\mutual_shock_month_mortality_19902014_`father_subsample'.dta", replace
		global files_to_delete $files_to_delete "mutual_shock_month_mortality_19902014_`father_subsample'"
		* no obs for subsample 3
	}

}

*** +- 1 week 
foreach father_subsample in 1 2 3 {
	use "$processed_data\subsample_f`father_subsample'.dta", clear 
	merge 1:m shnro using "D:\b44\custom-made\kuolinsyyt_tot_u0681_al4"
	keep if _merge == 3
	gen date_mortality= date(kuolpvm,"YMD")
	format date_mortality %td

	g days_difference = date_mortality - first_inphospital
	g mutual_shock_f = 1 if days_difference <= 7 & days_difference >= - 7

	keep if mutual_shock_f == 1 
	if _N > 0 {
		keep shnro shnro_mother mutual_shock_f
		rename shnro shnro_father
		duplicates drop shnro_father shnro_mother, force

		save "$processed_data\mutual_shock_week_mortality_19902014_`father_subsample'.dta", replace
		global files_to_delete $files_to_delete "mutual_shock_week_mortality_19902014_`father_subsample'"
		* no obs for subsample 3
	}

}


* CLEAN FINAL DATA WITH A VARIABLE IF MOTHER OR FATHER HAD A MUTUAL SHOCK
** +- 1 month
*** Mothers
use "$processed_data\matched.dta", clear 
foreach y in 8793 9402 0310 2011 2012 2013 1417 { 
	merge 1:1 shnro_mother using "$processed_data\mutual_shock_month`y'.dta", update
	if _rc != 601 { //this file does not exist bc not enough obs
		drop _merge
	}
}
merge 1:1 shnro_mother using "$processed_data\mutual_shock_month_mortality_19902014.dta", update 
drop _merge
save "$processed_data\matched_mutual_month", replace

*** Fathers
use "$processed_data\matched_mutual_month.dta", clear 
foreach father_subsample in 1 2  {
	foreach y in 8793 9402 0310 2011 2012 2013 1417 { 
		cap merge 1:1 shnro_father shnro_mother using ///
		"$processed_data\mutual_shock_month`y'_f`father_subsample'.dta", update
		if _rc != 601 { //this file does not exist bc not enough obs
			drop _merge
		}
	}

	cap merge 1:1 shnro_father shnro_mother using "$processed_data\mutual_shock_month_mortality_19902014_`father_subsample'.dta", update 
	if _rc != 601 { //this file does not exist bc not enough obs
		drop _merge
	}
}
save "$processed_data\matched_mutual_month", replace

** Keep sample withouth parents shock
use "$processed_data\matched_mutual_month.dta", clear 

keep if mutual_shock !=  1 
keep if mutual_shock_f != 1

save "$processed_data\matched_mutual_month", replace

** +- 1 week
*** Mothers
use "$processed_data\matched.dta", clear 
foreach y in 8793 9402 0310 2011 2012 2013 1417 { 
	cap merge 1:1 shnro_mother using "$processed_data\mutual_shock_week`y'.dta", update
	if _rc != 601 { //this file does not exist bc not enough obs
			drop _merge
	}
}
merge 1:1 shnro_mother using "$processed_data\mutual_shock_week_mortality_19902014.dta", update 
drop _merge
save "$processed_data\matched_mutual_week", replace

*** Fathers
use "$processed_data\matched_mutual_week.dta", clear 
foreach father_subsample in 1 2  {
	foreach y in 8793 9402 0310 2011 2012 2013 1417 { 
		cap merge 1:1 shnro_father shnro_mother using ///
		"$processed_data\mutual_shock_week`y'_f`father_subsample'.dta", update
		if _rc != 601 { //this file does not exist bc not enough obs
			drop _merge
		}
	}

	cap merge 1:1 shnro_father shnro_mother using ///
	"$processed_data\mutual_shock_week_mortality_19902014_`father_subsample'.dta", update 
	if _rc != 601 { //this file does not exist bc not enough obs
		drop _merge
	}
}
save "$processed_data\matched_mutual_week", replace

use "$processed_data\matched_mutual_week.dta", clear 
* keep sample withouth parents shock
keep if mutual_shock !=  1 
keep if mutual_shock_f != 1

save "$processed_data\matched_mutual_week", replace


* Remove files not used in analysis
*di "$files_to_delete"
foreach file of global files_to_delete {
	cap rm "$processed_data\\`file'.dta"
}

}

*********************************************
*** 17. CREATE MATCHED SAMPLE FOR MUTUAL SHOCKS
**** Mortality
*********************************************

{
    
global files_to_delete 
* Mother
** Hospitalizations
*** +- 1 month
foreach y in 8793 9402 0310 2011 2012 2013 1417 {
	use "$processed_data\matched_mortality.dta", clear
	rename shnro_mother shnro
	merge 1:m shnro using "$processed_data\d_inphospital`y'.dta"

	keep if _merge == 3

	g days_difference = tupva - first_mortality
	g mutual_shock = 1 if days_difference <= 30 & days_difference >= - 30

	keep if mutual_shock == 1 
	keep shnro mutual_shock
	rename shnro shnro_mother
	duplicates drop shnro_mother, force
	save "$processed_data\mutual_shock_month`y'.dta", replace
	global files_to_delete $files_to_delete "mutual_shock_month`y'"
}

*** +- 1 week
foreach y in 8793 9402 0310 2011 2012 2013 1417 {
	use "$processed_data\matched_mortality.dta", clear
	rename shnro_mother shnro
	merge 1:m shnro using "$processed_data\d_inphospital`y'.dta"

	keep if _merge == 3

	g days_difference = tupva - first_mortality
	g mutual_shock = 1 if days_difference <= 7 & days_difference >= - 7

	keep if mutual_shock == 1 
	keep shnro mutual_shock
	rename shnro shnro_mother
	duplicates drop shnro_mother, force
	save "$processed_data\mutual_shock_week`y'.dta", replace
	global files_to_delete $files_to_delete "mutual_shock_week`y'"
}



** Mortality
*** 90-2014
**** +- 1 month
use "$processed_data\matched_mortality.dta", clear 
rename shnro_mother shnro
merge 1:m shnro using "D:\b44\custom-made\kuolinsyyt_tot_u0681_al4"
keep if _merge == 3
gen date_mortality= date(kuolpvm,"YMD")
format date_mortality %td

g days_difference = date_mortality - first_mortality
g mutual_shock = 1 if days_difference <= 30 & days_difference >= - 30

keep if mutual_shock == 1 
keep shnro mutual_shock
rename shnro shnro_mother
duplicates drop shnro_mother, force
save "$processed_data\mutual_shock_month_mortality_19902014.dta", replace
global files_to_delete $files_to_delete "mutual_shock_month_mortality_19902014"

**** +- 1 week
use "$processed_data\matched_mortality.dta", clear 
rename shnro_mother shnro
merge 1:m shnro using "D:\b44\custom-made\kuolinsyyt_tot_u0681_al4"
keep if _merge == 3
gen date_mortality= date(kuolpvm,"YMD")
format date_mortality %td

g days_difference = date_mortality - first_mortality
g mutual_shock = 1 if days_difference <= 7 & days_difference >= - 7

keep if mutual_shock == 1 
keep shnro mutual_shock
rename shnro shnro_mother
duplicates drop shnro_mother, force
save "$processed_data\mutual_shock_week_mortality_19902014.dta", replace
global files_to_delete $files_to_delete "mutual_shock_week_mortality_19902014"

* Father
/*
- father can appear more than once (divorced and then children in another family)
- generate duplicates for father and then clean mutual shocks for each subsample
*/

use "$processed_data\matched_mortality.dta", clear
rename shnro_father shnro
bysort shnro : gen tag= _n
drop if shnro == ""
foreach i in  1 2 3{
	preserve
	keep if tag == `i'
	save "$processed_data\subsample_f`i'.dta", replace
	global files_to_delete $files_to_delete "subsample_f`i'"
restore
}

** Hospitalizations
*** +- 1 month
foreach y in 8793 9402 0310 2011 2012 2013 1417{
	foreach father_subsample in 1 2 3 {
		use "$processed_data\subsample_f`father_subsample'.dta", clear
		merge 1:m shnro using "$processed_data\d_inphospital`y'.dta"
		keep if _merge == 3

		g days_difference = tupva - first_mortality
		g mutual_shock_f = 1 if days_difference <= 30 & days_difference >= - 30

		keep if mutual_shock_f == 1 
		if _N > 0 {

			keep shnro shnro_mother mutual_shock_f
			rename shnro shnro_father
			duplicates drop shnro_father shnro_mother, force
			save "$processed_data\mutual_shock_month`y'_f`father_subsample'.dta", replace
			global files_to_delete $files_to_delete "mutual_shock_month`y'_f`father_subsample'"
			** no observations for subsample = 3
		}
	}
}

*** +- 1 week
foreach y in 8793 9402 0310 2011 2012 2013 1417{
	foreach father_subsample in 1 2 3 {
		use "$processed_data\subsample_f`father_subsample'.dta", clear
		merge 1:m shnro using "$processed_data\d_inphospital`y'.dta"
		keep if _merge == 3

		g days_difference = tupva - first_mortality
		g mutual_shock_f = 1 if days_difference <= 7 & days_difference >= - 7

		keep if mutual_shock_f == 1 
		if _N > 0 {
			keep shnro shnro_mother mutual_shock_f
			rename shnro shnro_father
			duplicates drop shnro_father shnro_mother, force
			save "$processed_data\mutual_shock_week`y'_f`father_subsample'.dta", replace
			global files_to_delete $files_to_delete "mutual_shock_week`y'_f`father_subsample'"
			** no observations for subsample = 3
		}
	}
}


** mortality 
*** +- 1 month
foreach father_subsample in 1 2 3 {

	use "$processed_data\subsample_f`father_subsample'.dta", clear
	merge 1:m shnro using "D:\b44\custom-made\kuolinsyyt_tot_u0681_al4"
	keep if _merge == 3
	gen date_mortality= date(kuolpvm,"YMD")
	format date_mortality %td

	g days_difference = date_mortality - first_mortality
	g mutual_shock_f = 1 if days_difference <= 30 & days_difference >= - 30

	keep if mutual_shock_f == 1 
	if _N > 0 {
		keep shnro shnro_mother mutual_shock_f
		rename shnro shnro_father
		duplicates drop shnro_father shnro_mother, force

		save "$processed_data\mutual_shock_month_mortality_19902014_`father_subsample'.dta", replace
		global files_to_delete $files_to_delete "mutual_shock_month_mortality_19902014_`father_subsample'"
		* no obs for subsample 3
	}

}

*** +- 1 week 
foreach father_subsample in 1 2 3 {
	use "$processed_data\subsample_f`father_subsample'.dta", clear 
	merge 1:m shnro using "D:\b44\custom-made\kuolinsyyt_tot_u0681_al4"
	keep if _merge == 3
	gen date_mortality= date(kuolpvm,"YMD")
	format date_mortality %td

	g days_difference = date_mortality - first_mortality
	g mutual_shock_f = 1 if days_difference <= 7 & days_difference >= - 7

	keep if mutual_shock_f == 1 
	if _N > 0 {
		keep shnro shnro_mother mutual_shock_f
		rename shnro shnro_father
		duplicates drop shnro_father shnro_mother, force

		save "$processed_data\mutual_shock_week_mortality_19902014_`father_subsample'.dta", replace
		global files_to_delete $files_to_delete "mutual_shock_week_mortality_19902014_`father_subsample'"
		* no obs for subsample 3
	}

}


* CLEAN FINAL DATA WITH A VARIABLE IF MOTHER OR FATHER HAD A MUTUAL SHOCK
** +- 1 month
*** Mothers
use "$processed_data\matched_mortality.dta", clear 
foreach y in 8793 9402 0310 2011 2012 2013 1417 {
	merge 1:1 shnro_mother using "$processed_data\mutual_shock_month`y'.dta", update
	drop _merge
}
merge 1:1 shnro_mother using "$processed_data\mutual_shock_month_mortality_19902014.dta", update 
drop _merge
save "$processed_data\matched_mortality_mutual_month", replace

*** Fathers
use "$processed_data\matched_mortality_mutual_month.dta", clear 
foreach father_subsample in 1 2  {
	foreach y in 8793 9402 0310 2011 2012 2013 1417 {
		cap merge 1:1 shnro_father shnro_mother using ///
		"$processed_data\mutual_shock_month`y'_f`father_subsample'.dta", update
		if _rc != 601 { //this file does not exist bc not enough obs
			drop _merge
		}
	}
	cap merge 1:1 shnro_father shnro_mother using ///
	"$processed_data\mutual_shock_month_mortality_19902014_`father_subsample'.dta", update 
	if _rc != 601 { //this file does not exist bc not enough obs
			drop _merge
	}
}
save "$processed_data\matched_mortality_mutual_month", replace

** Keep sample withouth parents shock
use "$processed_data\matched_mortality_mutual_month.dta", clear 

keep if mutual_shock !=  1 
keep if mutual_shock_f != 1

save "$processed_data\matched_mortality_mutual_month", replace

** +- 1 week
*** Mothers
use "$processed_data\matched_mortality.dta", clear
foreach y in 8793 9402 0310 2011 2012 2013 1417 { 
	merge 1:1 shnro_mother using "$processed_data\mutual_shock_week`y'.dta"
	drop _merge
}
merge 1:1 shnro_mother using "$processed_data\mutual_shock_week_mortality_19902014.dta", update 
drop _merge
save "$processed_data\matched_mortality_mutual_week", replace

*** Fathers
use "$processed_data\matched_mortality_mutual_week.dta", clear 
foreach father_subsample in 1 2  {
	foreach y in 8793 9402 0310 2011 2012 2013 1417 { 
		cap merge 1:1 shnro_father shnro_mother using ///
		"$processed_data\mutual_shock_week`y'_f`father_subsample'.dta", update
		if _rc != 601 { //this file does not exist bc not enough obs
			drop _merge
		}
	}

	cap merge 1:1 shnro_father shnro_mother using ///
	"$processed_data\mutual_shock_week_mortality_19902014_`father_subsample'.dta", update 
	if _rc != 601 { //this file does not exist bc not enough obs
			drop _merge
		}
}
save "$processed_data\matched_mortality_mutual_week", replace

use "$processed_data\matched_mortality_mutual_week.dta", clear 
* keep sample withouth parents shock
keep if mutual_shock !=  1 
keep if mutual_shock_f != 1

save "$processed_data\matched_mortality_mutual_week", replace


* Remove files not used in analysis
foreach file of global files_to_delete {
	cap rm "$processed_data\\`file'.dta"
}

}
*********************************************
*** 18. CREATE SAMPLE OF AGGREGATED CHILD HOSPITAL STAYS
*********************************************

{


* Inpatient hospitalizations 1987-1993 
use "$processed_data\shnrochild.dta", clear 
duplicates drop shnro, force
merge 1:m shnro using "D:\b44\custom-made\vos9093_shnro"
drop if _merge == 2

** drop pregnancies
rename pdg ICD9
drop if substr(ICD9,1,2) == "63" | substr(ICD9,1,2) == "64" | ///
substr(ICD9,1,2) == "65" | substr(ICD9,1,2) == "66" | substr(ICD9,1,2) == "67"

** keep  hospital admissions only (at least 1 day)
g lengthstay = lpvm - tupva
g inphospital = 1 if lengthstay >= 1 & lengthstay != .
replace inphospital = 0 if inphospital == .

keep if inphospital==1

** Keep id, date of admission and dummy for visit
keep shnro aiti_shnro tupva lengthstay lpvm

ren tupva tupva_inphospital
save "$processed_data\visits8793child.dta",replace

* Inpatient hospitalizations 1994-2002 and 2003-2010
local 9402_lab "9402"
local 0310_lab "0310"
foreach y in 9402 0310{

	use "$processed_data\shnrochild.dta", clear
	duplicates drop shnro, force
	merge 1:m shnro using  "D:\b44\custom-made\vos`y'_shnro"
	drop if _merge == 2

	** drop hospitalizations with pregnancies
	rename pdg ICD9
	rename pdgo ICD10 
	drop if substr(ICD9,1,2) == "63" | substr(ICD9,1,2) == "64" | ///
	substr(ICD9,1,2) == "65" | substr(ICD9,1,2) == "66" | substr(ICD9,1,2) == "67"
	drop if substr(ICD10,1,1) == "O"

	** keep hospital admissions only (at least 1 day)
	g lengthstay = lpvm - tupva
	g inphospital = 1 if lengthstay >= 1 & lengthstay != .
	replace inphospital = 0 if inphospital == .

	keep if inphospital==1
	
	** Keep id, date of admission and dummy for visit
	keep shnro aiti_shnro tupva lengthstay lpvm

	ren tupva tupva_inphospital
	save "$processed_data\visits``y'_lab'child.dta",replace

}

* Inpatient hospitalizations 2011, 2012, 2013
foreach y in 2011 2012 2013{

	use "$processed_data\shnrochild.dta", clear
	duplicates drop shnro, force
	merge 1:m shnro using  "D:\b44\custom-made\fleed_hilmo_`y'_shnro"

	drop if _merge == 2

	** drop pregnancies
	rename pdgo ICD10
	drop if substr(ICD10,1,1) == "O"

	** define hospitalizations (at least one night))
	gen tupva=date(tulopv, "DMY")
	format tupva %td

	gen lpvm=date(lahtopv, "DMY")
	format lpvm %td

	g lengthstay = lpvm - tupva
	g inphospital = 1 if lengthstay >= 1 & lengthstay != .
	replace inphospital = 0 if inphospital == .
	
	keep if inphospital==1

	** Keep id, date of admission and dummy for visit
	keep shnro aiti_shnro tupva lengthstay lpvm
	
	ren tupva tupva_inphospital
	
	save "$processed_data\visits`y'child.dta",replace
}

* Inpatient hospitalizations 2014-2017
use "$processed_data\shnrochild.dta", clear
duplicates drop shnro, force
merge 1:m shnro using  "D:\b44\custom-made\hilmo_2014_2017_shnro"
drop if _merge == 2

** drop pregnancies
rename pdgo ICD10 
drop if substr(ICD10,1,1) == "O"

** keep hospitalizations (at least one night)
gen tupva1 = dofc(tupva)
format tupva1 %td
gen lpvm1 = dofc(lpvm)
format lpvm1 %td

g lengthstay = lpvm1 - tupva1
g inphospital = 1 if lengthstay >= 1 & lengthstay != .
replace inphospital = 0 if inphospital == .


keep if inphospital==1

** Keep id, date of admission and dummy for visit
keep shnro aiti_shnro tupva1 lengthstay lpvm1

ren tupva1 tupva_inphospital

save "$processed_data\visits1417child.dta",replace
 
* Generate total number of hospital stays per child
use "$processed_data\visits8793child.dta", clear 
append using "$processed_data\visits9402child.dta"
append using "$processed_data\visits0310child.dta"
append using "$processed_data\visits2011child.dta"
append using "$processed_data\visits2012child.dta"
append using "$processed_data\visits2013child.dta"
append using "$processed_data\visits1417child.dta"

rename shnro lapsi_shnro

drop if missing(lapsi_shnro) // Why are these missing?

preserve 
*** keep one obs per kid 
gen n = 1 
collapse (sum) n_stay=n, by(lapsi_shnro)
su ,de 
save "$processed_data\stays_all.dta", replace 
restore 
 
* Generate annual number of hospital stays per child
gen year = year(tupva_inphospital)
gen n = 1 
collapse (sum) n_stay=n, by(lapsi_shnro year)
su,de 
reshape wide n_stay, i(lapsi_shnro) j(year)
save "$processed_data\stays_year.dta",replace 

* Remove files not used in analysis
local files_to_delete "visits8793child visits9402child visits0310child visits2011child visits2012child visits2013child visits1417child visits9802outpatient visits0307outpatient visits0810outpatient visits1417outpatient"
foreach file of local files_to_delete {
	cap rm "$processed_data\\`file'.dta"
}


}


*********************************************
*** 19. CREATE SAMPLE OF CHRONIC VS NONCHRONIC CONDITIONS
*********************************************

{

/* first loading the external classification */
import delimited ///
"W:\Ramon\Child_Health_Shocks\info\classification_chronic_conditions.csv", ///
varnames(1) clear

des, f

* Clean variables
** Chronic Indicator 
replace chronicindicator = subinstr(chronicindicator, "'", "",.)
destring chronicindicator, replace 
tab chronicindicator,m 

** body system
tab bodysystem,m 
replace bodysystem = subinstr(bodysystem, "'", "",.)
replace bodysystem = "" if bodysystem == "None"
destring bodysystem, replace 
tab bodysystem,m 

** icd10 
gen icd10 = subinstr(icd10cmcode, "'", "",.)
rename icd10 ICD10

list in 1/10

keep ICD10 chronicindicator

save "$processed_data\chronic_conditions.dta", replace 

* Merge with overall data 
use "$processed_data\matched_hg.dta", clear 
merge m:1 ICD10 using "$processed_data\chronic_conditions.dta"
tab chronicindicator _merge,m
drop if _merge == 2
drop _merge 
tab chronicindicator,m 
tab groups chronicindicator
des 

save "$processed_data\matched_chronic.dta", replace 

}











